This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.
When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:
knitr::include_graphics("lobby6modularity_yf.png")
purple = dementia green = cancer council blue= stroke nsf black = heart foundation orange = ms australia red = mnd
knitr::include_graphics("strokeHF.png")
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(DT)
## Warning: package 'DT' was built under R version 3.4.3
#library(ggplot2)
library(plotly)
## Loading required package: ggplot2
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
l6<-readxl::read_xlsx("lobby6income.xlsx")
#mortality from abs
#www.abs.gov.au/ausstats/abs@.nsf/Lookup/by%20Subject/3303.0~2016~Main%20Features~Australia's%20leading%20causes%20of%20death,%202016~3
#MND mortablity from mnd website for 2015
#https://www.mndaust.asn.au/Get-informed/What-is-MND/Facts-and-figures.aspx
#cancer prevalence estimated from living people with cancer from 1982
#https://canceraustralia.gov.au/affected-cancer/what-cancer/cancer-australia-statistics
#https://strokefoundation.org.au/about-stroke/facts-and-figures-about-stroke
#stroke prevalence from NSF
#scatter plot relationship
#use geom_point for each column of data
#specify colour
#specify shape
#remove last 3 rows for analysis
#row 8 to 10
l6m <-l6
l6m = l6m %>% filter(
!row_number()==7:9
)
gg<-ggplot(l6m,aes(y=revenue2016))+
geom_point(aes(x=nodes),colour="red",shape="*")+
geom_point(aes(x=edges),colour="yellow",shape="=")+
#geom_point(aes(x=pagerank),colour="blue",shape="+")+
#geom_point(aes(x=prevalence),colour="green",shape="P")+
#geom_point(aes(x=death),colour="purple",shape="D")
ggtitle("Network properties versus Revenue")
#click on point in graph for values
ggplotly(gg)
#write function to perform regression
#apply function over vector
#return vector
Fit=glm(revenue2016~nodes,data=l6m)
summary(Fit)
##
## Call:
## glm(formula = revenue2016 ~ nodes, data = l6m)
##
## Deviance Residuals:
## 1 2 3 4 5 6
## -22057167 58675368 -11541914 -10576766 -17411703 2912181
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 31122960 182864048 0.170 0.873
## nodes -26512 556235 -0.048 0.964
##
## (Dispersion parameter for gaussian family taken to be 1.121512e+15)
##
## Null deviance: 4.4886e+15 on 5 degrees of freedom
## Residual deviance: 4.4860e+15 on 4 degrees of freedom
## AIC: 228.52
##
## Number of Fisher Scoring iterations: 2
Fit=glm(revenue2016~prevalence,data=l6m)
summary(Fit)
##
## Call:
## glm(formula = revenue2016 ~ prevalence, data = l6m)
##
## Deviance Residuals:
## 1 2 3 4 5 6
## -26805921 54674562 568265 -9094314 -21773001 2430408
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2.765e+07 1.765e+07 1.567 0.192
## prevalence -1.737e+01 3.848e+01 -0.452 0.675
##
## (Dispersion parameter for gaussian family taken to be 1.067716e+15)
##
## Null deviance: 4.4886e+15 on 5 degrees of freedom
## Residual deviance: 4.2709e+15 on 4 degrees of freedom
## AIC: 228.22
##
## Number of Fisher Scoring iterations: 2
Fit=glm(revenue2016~death,data=l6m)
summary(Fit)
##
## Call:
## glm(formula = revenue2016 ~ death, data = l6m)
##
## Deviance Residuals:
## 1 2 3 4 5 6
## -5658047 44759645 -33914438 -6758600 -304660 1876101
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 5523723 17866115 0.309 0.773
## death 1260 1012 1.245 0.281
##
## (Dispersion parameter for gaussian family taken to be 8.087299e+14)
##
## Null deviance: 4.4886e+15 on 5 degrees of freedom
## Residual deviance: 3.2349e+15 on 4 degrees of freedom
## AIC: 226.55
##
## Number of Fisher Scoring iterations: 2
#remove outlier
l6m2 = l6m %>% filter(
!row_number()==2 )
#node
Fit=glm(revenue2016~nodes,data=l6m2)
summary(Fit)
##
## Call:
## glm(formula = revenue2016 ~ nodes, data = l6m2)
##
## Deviance Residuals:
## 1 2 3 4 5
## -4168931 1684835 2960749 476533 -953186
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -83413426 18136070 -4.599 0.0193 *
## nodes 284254 54657 5.201 0.0138 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for gaussian family taken to be 1.004011e+13)
##
## Null deviance: 3.0167e+14 on 4 degrees of freedom
## Residual deviance: 3.0120e+13 on 3 degrees of freedom
## AIC: 167.32
##
## Number of Fisher Scoring iterations: 2
#edge
Fit=glm(revenue2016~prevalence,data=l6m2)
summary(Fit)
##
## Call:
## glm(formula = revenue2016 ~ prevalence, data = l6m2)
##
## Deviance Residuals:
## 1 2 3 4 5
## -6871092 -5066836 1057465 -2412283 13292746
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 7.680e+06 5.783e+06 1.328 0.276
## prevalence 8.370e+00 1.152e+01 0.727 0.520
##
## (Dispersion parameter for gaussian family taken to be 8.550639e+13)
##
## Null deviance: 3.0167e+14 on 4 degrees of freedom
## Residual deviance: 2.5652e+14 on 3 degrees of freedom
## AIC: 178.03
##
## Number of Fisher Scoring iterations: 2
#death
Fit=glm(revenue2016~death,data=l6m2)
summary(Fit)
##
## Call:
## glm(formula = revenue2016 ~ death, data = l6m2)
##
## Deviance Residuals:
## 1 2 3 4 5
## -6687676 -5684961 1521399 -1874062 12725300
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 7281372.2 5682566.4 1.281 0.290
## death 299.2 358.4 0.835 0.465
##
## (Dispersion parameter for gaussian family taken to be 8.160127e+13)
##
## Null deviance: 3.0167e+14 on 4 degrees of freedom
## Residual deviance: 2.4480e+14 on 3 degrees of freedom
## AIC: 177.8
##
## Number of Fisher Scoring iterations: 2
summary(Fit)
##
## Call:
## glm(formula = revenue2016 ~ death, data = l6m2)
##
## Deviance Residuals:
## 1 2 3 4 5
## -6687676 -5684961 1521399 -1874062 12725300
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 7281372.2 5682566.4 1.281 0.290
## death 299.2 358.4 0.835 0.465
##
## (Dispersion parameter for gaussian family taken to be 8.160127e+13)
##
## Null deviance: 3.0167e+14 on 4 degrees of freedom
## Residual deviance: 2.4480e+14 on 3 degrees of freedom
## AIC: 177.8
##
## Number of Fisher Scoring iterations: 2
univ<-function (y,x){
z=glm(y~x)
return(z)
}
#use datable
datatable(l6m)
Note that the echo = FALSE parameter was added to the code chunk to prevent printing of the R code that generated the plot.